#------------------------------------------------------------------------------
# use combined individual level data to generate
# ___ export data to stata data format for statistical tests 
#==============================================================================

load_library = c('bit64','data.table','fst','future.apply','stringr','logger','vroom','rio','stringr')
invisible(lapply(load_library, function(x) library(x, character.only=TRUE, quietly= TRUE)))

bucket = '/N/project/iuni_doctorshopping/'

ind_combined = read_fst(file.path(bucket,'projects','covid_opioid','data','processed_data',
	'weekly_ses_individuals.fst'), as.data.table=TRUE)

#==============
# figure 3. trends for transition
#==============

# define state
pain_ind_combined = ind_combined[targetpain == 1,]

pain_ind_combined[opioids == 0 & therapy == 0, state_t0 := 0]
pain_ind_combined[opioids == 1 & therapy == 0, state_t0 := 1]
pain_ind_combined[opioids == 0 & therapy == 1, state_t0 := 2]
pain_ind_combined[opioids == 1 & therapy == 1, state_t0 := 3]

# drop only one week visit within each year
pain_ind_combined[, n_week := .N, by=c('PATID','year')]

# how many dropped..
message('now drop ... n.obs ...', nrow(pain_ind_combined[n_week == 1, ]))
pain_ind_combined[n_week == 1, table(year)]
#year
#   2019    2020 
#2058699 1908567 

pain_ind_combined = pain_ind_combined[n_week >= 2, ]

# sort based on weeknum
setorder(pain_ind_combined, PATID, weeknum)

# create lead 
pain_ind_combined[, state_t1 := shift(state_t0, n=1, type='lead'), by=c('PATID','year')]
nrow(pain_ind_combined[is.na(state_t1),]) # 7147912 ... when there's no "next" visit 
pain_ind_combined = pain_ind_combined[!is.na(state_t1),]


# for transition analysis
rio::export(pain_ind_combined, file.path(bucket,'projects','covid_opioid',
	'data','processed_data','pain_treatment_transition.dta'))

# for all pain related statistical tests
sum_ind_combined = ind_combined[!is.na(period), lapply(.SD, sum, na.rm=TRUE), 
	.SDcols = c('n_week','targetpain','backpain','neckpain','limbpain',
			'opioids','therapy','sum_opioid_days','sum_opioid_mme'), 
	by=c('PATID','year','period')]

outfile = file.path(bucket,'projects','covid_opioid','data',
	'processed_data','ses_individual_period_opioid_therapy_2019_0101_0930.dta')
logger::log_info('now writing data ... individual')

rio::export(sum_ind_combined, outfile)

